# load companies file of EIN to name and endowment data
companies_to_ein <- read_rds(here('data', 'companies.RDS'))
endowment_data <- read_rds(here("data",
"endowments_by_most_recent_filings.RDS")) %>%
select(-c(EndowmentsHeldUnrelatedOrgInd, EndowmentsHeldRelatedOrgInd)) %>%
pivot_longer(-c(EIN, fiscal_year),
names_to = "variable_name") %>%
left_join(companies_to_ein) %>%
mutate(fiscal_year=as.numeric(paste(fiscal_year)))
# extract return dates
source(here("GET_VARS.R"))
files <- dir(here("ballet_990_released_20230208"),
full.names = TRUE)
dates <- map_df(files,
~get_df(filename = .x,
variables = c("//Return//ReturnHeader//TaxPeriodEndDt"))) %>%
mutate(fiscal_year = as.numeric(paste(fiscal_year))) %>%
filter_ein()
saveRDS(dates, here('data', 'dates.RDS'))
dates <- readRDS( here('data', 'dates.RDS')) %>%
select(EIN, TaxPeriodEndDt, fiscal_year)
endowment_data <- endowment_data %>%
mutate(fiscal_year=as.numeric(paste(fiscal_year))) %>%
left_join(dates)
# function to plot variables of interest against each other
plot_ranks <- function(var1, var2, data) {
plt <- data %>%
group_by(fiscal_year) %>%
# arrange(var1) %>%
mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
# arrange(var2) %>%
mutate("{var2}_rank" := rank(-!!sym(var2)), na.last = "keep") %>%
ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
color = organization_name,
label =EIN
)) +
geom_point() +
geom_function(fun=function(x)x,color="darkred", alpha = .8) +
labs(x = paste0(var1, " Rank"),
y = paste0(var2, " Rank")) +
theme_bw() +
labs(title = glue("Rank of {var2} vs. Rank of {var1}")) +
viridis::scale_color_viridis(discrete=TRUE,
option = "rocket",
end = .9) +
facet_wrap(~fiscal_year)+
theme(plot.title = element_text(size = 14,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 14),
axis.title = element_text(size = 13,
face = "bold"))
ggplotly(plt, margin = m, height = 550) %>%
partial_bundle()
}
plot_ranks_by_consistency <- function(var1, var2, data) {
plt <- data %>%
filter(fiscal_year > 2010 & fiscal_year < 2021) %>%
group_by(fiscal_year) %>%
# arrange(var1) %>%
mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
# arrange(var2) %>%
mutate("{var2}_rank" := rank(-!!sym(var2)), na.last = "keep") %>%
mutate(rank_diff = !!sym(glue("{var2}_rank")) - !!sym(glue("{var1}_rank" ))) %>%
group_by(EIN) %>%
mutate(sum_pos = sum(rank_diff >0, na.rm=TRUE),
sum_neg = sum(rank_diff < 0, na.rm=TRUE),
sum_zero = sum(rank_diff ==0, na.rm=TRUE))%>%
ungroup() %>%
mutate(category = case_when(sum_pos != 0 & sum_neg != 0 ~ "Had Some Change",
sum_zero == sum_pos + sum_neg + sum_zero ~ "Always Ranked the Same",
sum_pos == sum_pos + sum_neg + sum_zero ~ paste("Always Ranked Higher in", var1),
sum_neg == sum_pos + sum_neg + sum_zero ~ paste("Always Ranked Higher in", var2 ))) %>%
ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
color = category,
text =organization_name
)) +
geom_point() +
geom_function(fun=function(x)x,color="darkred", alpha = .8) +
labs(x = paste0(var1, " Rank"),
y = paste0(var2, " Rank"),
color = "Consistency of Rankings") +
theme_c() +
labs(title = glue("Rank of {var2} vs. Rank of {var1}")) +
scale_color_brewer(palette ="Set2") +
facet_wrap(~fiscal_year)+
theme(plot.title = element_text(size = 14,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 14),
axis.title = element_text(size = 13,
face = "bold"))
ggplotly(plt, margin = m, height = 550) %>% partial_bundle()
}
plot_ranks_by_consistency <- function(var1, var2, data) {
plt <- data %>%
filter(fiscal_year > 2010 & fiscal_year < 2021) %>%
group_by(fiscal_year) %>%
# arrange(var1) %>%
mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
# arrange(var2) %>%
mutate("{var2}_rank" := rank(-!!sym(var2)), na.last = "keep") %>%
mutate(rank_diff = !!sym(glue("{var2}_rank")) - !!sym(glue("{var1}_rank" ))) %>%
group_by(EIN) %>%
mutate(sum_pos = sum(rank_diff >0, na.rm=TRUE),
sum_neg = sum(rank_diff < 0, na.rm=TRUE),
sum_zero = sum(rank_diff ==0, na.rm=TRUE))%>%
ungroup() %>%
mutate(prop_positive = sum_pos / (sum_pos + sum_neg + sum_zero)) %>%
ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
color = prop_positive,
text =organization_name
)) +
geom_function(fun=function(x)x,color="darkred", alpha = .8, n =201) +
geom_point() +
labs(x = paste0(var1, " Rank"),
y = paste0(var2, " Rank"),
title = glue("Rank of {var2} vs. Rank of {var1}"),
color = glue("Proportion Where {var1}\nRanked Higher than\n{var2}")) +
theme_c(legend.text=element_text(size =8)) +
scale_color_distiller(palette = "PuOr") +
facet_wrap(~fiscal_year)+
theme(plot.title = element_text(size = 12,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 14),
axis.title = element_text(size = 13,
face = "bold"))
ggplotly(plt, margin = m, height = 550) %>% partial_bundle()
}
#
# print_df_category <- function(data, var1, var2) {
# data %>%
# filter(fiscal_year > 2010 & fiscal_year < 2021) %>%
# group_by(fiscal_year) %>%
# # arrange(var1) %>%
# mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
# # arrange(var2) %>%
# mutate("{var2}_rank" := rank(-!!sym(var2)), na.last = "keep") %>%
# mutate(rank_diff = !!sym(glue("{var2}_rank")) - !!sym(glue("{var1}_rank" ))) %>%
# group_by(EIN) %>%
# mutate(sum_pos = sum(rank_diff >0, na.rm=TRUE),
# sum_neg = sum(rank_diff < 0, na.rm=TRUE),
# sum_zero = sum(rank_diff ==0, na.rm=TRUE))%>%
# ungroup() %>%
# mutate(prop_positive = sum_pos / (sum_pos + sum_neg + sum_zero)) %>%
# group_by()
# }
#
# function to plot variables of interest against each other
plot_combo <- function(var1, var2, data) {
data %>%
ggplot(aes(x = !!sym(var1), y = !!sym(var2), color = EIN)) +
geom_point(alpha = .9) +
# geom_line(alpha = .5) +
facet_wrap(~fiscal_year) +
viridis::scale_color_viridis(discrete=TRUE,
option = "rocket",
end = .9) +
theme_bw()+
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 16),
axis.title = element_text(size = 13,
face = "bold"),
legend.position = "none",
axis.text.x = element_text(angle = 60, vjust = .6)) +
scale_x_continuous(labels=comma) +
scale_y_continuous(labels=comma) +
labs(title = paste0(var2, " vs. ", var1),
subtitle = "Fill by EIN")
}
endowment_data_wide <- endowment_data %>%
pivot_wider(names_from=variable_name,
values_from=value)
Plotting Endowment Variables Against Each Other, By Year
vars <- unique(endowment_data$variable_name)[!grepl("EOY|Admin|Grants", unique(endowment_data$variable_name))]
# pairwise combinations of variables
variable_combinations <- t(combn(vars, 2)) %>%
as.data.frame()
if (!all_plots) variable_combinations <- variable_combinations[1:4,]
cat('## Scale of Original Variables {.tabset} \n\n')
pwalk(variable_combinations, ~{
cat('### ',paste0(.x, ", ", .y),'\n\n')
plt <- plot_combo(var1 = .x, var2 = .y, data = endowment_data_wide)
print(plt)
cat('\n\n')
}
)
plt <- endowment_data_wide %>%
select(contains("EOY"),fiscal_year,
EIN, organization_name) %>%
pivot_longer(cols = contains("EOY")) %>%
mutate(name = case_when(
name == "TermEndowmentBalanceEOYPct" ~ "Temporarily restricted endowment\nEnd of Year Pct",
name == "PrmnntEndowmentBalanceEOYPct" ~ "Permanent endowment\nEnd of Year Pct",
name == "BoardDesignatedBalanceEOYPct" ~ "Board designated or quasi-endowment\nEnd of Year Pct"
)) %>%
mutate(value = 100*value) %>%
filter(!is.na(value)) %>%
ggplot(aes(x=fiscal_year,
y = value,
color = organization_name)) +
geom_line() +
facet_wrap(~name) +
theme_c(strip.text = element_text(margin =margin(3,0,20,0),
size = 9),
plot.title = element_text(margin = margin(3,0,12,0))) +
scale_color_viridis(option="rocket", discrete= TRUE) +
labs(y="Percentage of Endowment in Category",
x = "Fiscal Year",
title = "Where Endowments are Held, by Fiscal Year")
ggplotly(plt, margin = m, height = 550) %>% partial_bundle()
Rankings
Relationships between how companies rank in different variables
plotlist <- pmap(variable_combinations, ~{
#plt <- plot_ranks(var1 = .x, var2 = .y, data = endowment_data_wide)
plt <- plot_ranks_by_consistency(var1 = .x, var2 = .y, data = endowment_data_wide) %>% partial_bundle()
}
)
htmltools::tagList(setNames(plotlist, NULL))